{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集大小: 13209\n",
      "验证集大小: 3303\n",
      "测试集大小: 4128\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from torch.utils.data import DataLoader, Dataset\n",
    "import os\n",
    "\n",
    "# 设置中文字体支持\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签\n",
    "plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号\n",
    "\n",
    "# 加载加利福尼亚房价数据集\n",
    "housing = fetch_california_housing()\n",
    "X = housing.data\n",
    "y = housing.target\n",
    "\n",
    "# 数据拆分：训练集(60%)、验证集(20%)、测试集(20%)\n",
    "X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)  # 0.25 x 0.8 = 0.2\n",
    "\n",
    "print(f\"训练集大小: {X_train.shape[0]}\")\n",
    "print(f\"验证集大小: {X_val.shape[0]}\")\n",
    "print(f\"测试集大小: {X_test.shape[0]}\")\n",
    "\n",
    "# 数据标准化\n",
    "scaler = StandardScaler()\n",
    "X_train_scaled = scaler.fit_transform(X_train)\n",
    "X_val_scaled = scaler.transform(X_val)\n",
    "X_test_scaled = scaler.transform(X_test)\n",
    "\n",
    "# 自定义数据集类\n",
    "class HousingDataset(Dataset):\n",
    "    def __init__(self, features, targets):\n",
    "        self.features = torch.FloatTensor(features)\n",
    "        self.targets = torch.FloatTensor(targets).view(-1, 1)\n",
    "        \n",
    "    def __len__(self):\n",
    "        return len(self.features) #返回样本数量\n",
    "    \n",
    "    def __getitem__(self, idx): #传入索引，返回对应索引样本的特征和目标\n",
    "        return self.features[idx], self.targets[idx]\n",
    "\n",
    "# 创建数据集实例\n",
    "train_dataset = HousingDataset(X_train_scaled, y_train)\n",
    "val_dataset = HousingDataset(X_val_scaled, y_val)\n",
    "test_dataset = HousingDataset(X_test_scaled, y_test)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 加载数据，构建模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RegressionModel(\n",
      "  (layer1): Linear(in_features=8, out_features=30, bias=True)\n",
      "  (activation): ReLU()\n",
      "  (output): Linear(in_features=30, out_features=1, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# 创建数据加载器\n",
    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
    "val_loader = DataLoader(val_dataset, batch_size=32)\n",
    "test_loader = DataLoader(test_dataset, batch_size=32)\n",
    "\n",
    "# 定义神经网络模型\n",
    "class RegressionModel(nn.Module):\n",
    "    def __init__(self, input_dim):\n",
    "        super(RegressionModel, self).__init__()\n",
    "        self.layer1 = nn.Linear(input_dim, 30)\n",
    "        self.activation = nn.ReLU()\n",
    "        self.output = nn.Linear(30, 1)\n",
    "        \n",
    "    def forward(self, x):\n",
    "        x = self.activation(self.layer1(x))\n",
    "        x = self.output(x)\n",
    "        return x\n",
    "\n",
    "# 初始化模型、损失函数和优化器\n",
    "input_dim = X_train.shape[1]\n",
    "model = RegressionModel(input_dim)\n",
    "criterion = nn.MSELoss()\n",
    "optimizer = optim.Adam(model.parameters())\n",
    "\n",
    "# 打印模型结构\n",
    "print(model)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  10%|█         | 10/100 [00:02<00:20,  4.45it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 10/100, 训练损失: 0.3595, 验证损失: 0.4059, 全局步数: 4130\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  20%|██        | 20/100 [00:04<00:18,  4.43it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 20/100, 训练损失: 0.3259, 验证损失: 0.3747, 全局步数: 8260\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  30%|███       | 30/100 [00:06<00:16,  4.24it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 30/100, 训练损失: 0.3137, 验证损失: 0.3581, 全局步数: 12390\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  40%|████      | 40/100 [00:09<00:13,  4.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 40/100, 训练损失: 0.3062, 验证损失: 0.3430, 全局步数: 16520\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  50%|█████     | 50/100 [00:11<00:11,  4.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 50/100, 训练损失: 0.3076, 验证损失: 0.4152, 全局步数: 20650\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  60%|██████    | 60/100 [00:13<00:09,  4.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 60/100, 训练损失: 0.3043, 验证损失: 0.3612, 全局步数: 24780\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  70%|███████   | 70/100 [00:15<00:06,  4.42it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 70/100, 训练损失: 0.2983, 验证损失: 0.3413, 全局步数: 28910\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  80%|████████  | 80/100 [00:18<00:04,  4.49it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 80/100, 训练损失: 0.3037, 验证损失: 0.3421, 全局步数: 33040\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度:  90%|█████████ | 90/100 [00:20<00:02,  4.42it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 90/100, 训练损失: 0.2950, 验证损失: 0.3487, 全局步数: 37170\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "训练进度: 100%|██████████| 100/100 [00:22<00:00,  4.39it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "轮次 100/100, 训练损失: 0.2978, 验证损失: 0.3392, 全局步数: 41300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# 训练模型\n",
    "from tqdm import tqdm\n",
    "epochs = 100\n",
    "train_losses = []\n",
    "val_losses = []\n",
    "\n",
    "global_step = 0\n",
    "\n",
    "# 使用tqdm显示总体进度\n",
    "for epoch in tqdm(range(epochs), desc=\"训练进度\"):\n",
    "    # 训练模式\n",
    "    model.train()\n",
    "    running_loss = 0.0\n",
    "    running_mae = 0.0\n",
    "    \n",
    "    for inputs, targets in train_loader:\n",
    "        optimizer.zero_grad()\n",
    "        outputs = model(inputs)\n",
    "        loss = criterion(outputs, targets)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        \n",
    "        running_loss += loss.item() * inputs.size(0)\n",
    "\n",
    "        \n",
    "        # 更新全局步数\n",
    "        global_step += 1\n",
    "    \n",
    "    epoch_train_loss = running_loss / len(train_loader.dataset)\n",
    "\n",
    "    train_losses.append(epoch_train_loss)\n",
    "\n",
    "    \n",
    "    # 验证模式\n",
    "    model.eval()\n",
    "    running_loss = 0.0\n",
    "\n",
    "    \n",
    "    with torch.no_grad():\n",
    "        for inputs, targets in val_loader:\n",
    "            outputs = model(inputs)\n",
    "            loss = criterion(outputs, targets)\n",
    "            \n",
    "            running_loss += loss.item() * inputs.size(0)\n",
    "\n",
    "    epoch_val_loss = running_loss / len(val_loader.dataset)\n",
    "\n",
    "    val_losses.append(epoch_val_loss)\n",
    "\n",
    "    \n",
    "    if (epoch + 1) % 10 == 0:\n",
    "        print(f'轮次 {epoch+1}/{epochs}, 训练损失: {epoch_train_loss:.4f}, 验证损失: {epoch_val_loss:.4f}, 全局步数: {global_step}')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集 - 均方误差: 0.2919\n",
      "验证集 - 均方误差: 0.3392\n",
      "测试集 - 均方误差: 0.3236\n"
     ]
    }
   ],
   "source": [
    "# 评估模型\n",
    "def evaluate(model, dataloader, criterion):\n",
    "    model.eval()\n",
    "    running_loss = 0.0\n",
    "\n",
    "    \n",
    "    with torch.no_grad():\n",
    "        for inputs, targets in dataloader:\n",
    "            outputs = model(inputs) #前向计算\n",
    "            loss = criterion(outputs, targets) #计算损失\n",
    "            \n",
    "            running_loss += loss.item() * inputs.size(0)\n",
    "    \n",
    "    return running_loss / len(dataloader.dataset)\n",
    "\n",
    "train_loss = evaluate(model, train_loader, criterion)\n",
    "val_loss= evaluate(model, val_loader, criterion)\n",
    "test_loss = evaluate(model, test_loader, criterion)\n",
    "\n",
    "print(f\"训练集 - 均方误差: {train_loss:.4f}\")\n",
    "print(f\"验证集 - 均方误差: {val_loss:.4f}\")\n",
    "print(f\"测试集 - 均方误差: {test_loss:.4f}\")\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
